For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.

The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.

The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.

Sites

Ashdod-Yam

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(GGally); # GGpairs
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Mn 
##     0     0     1     0     0     0    43     1     0     0     0    28     0 
##    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Ba 
##     0     1     0     0    27     0     0     2     0    31    45

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Ba)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pXRF: K means cluster analysis

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_final[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_final[3:12], mapping = aes(col = cluster_x))

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3836 1.2233 1.0086 0.84957 0.60483 0.49903 0.44478
## Proportion of Variance 0.5719 0.1506 0.1024 0.07265 0.03682 0.02507 0.01991
## Cumulative Proportion  0.5719 0.7225 0.8249 0.89752 0.93434 0.95941 0.97932
##                           PC8     PC9    PC10
## Standard deviation     0.3538 0.22250 0.17534
## Proportion of Variance 0.0126 0.00498 0.00309
## Cumulative Proportion  0.9919 0.99691 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by area")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by sample type")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 47 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=7)

area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=7)

type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with sample types")
legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:52          Min.   :8.079   Min.   :10.13   Min.   :1.538  
##  Class :character   1st Qu.:8.566   1st Qu.:11.00   1st Qu.:2.464  
##  Mode  :character   Median :9.006   Median :11.71   Median :2.635  
##                     Mean   :8.994   Mean   :11.55   Mean   :2.559  
##                     3rd Qu.:9.435   3rd Qu.:12.05   3rd Qu.:2.824  
##                     Max.   :9.925   Max.   :12.68   Max.   :3.043  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.11   Min.   :10.08   Min.   :10.05   Length:52         
##  1st Qu.:10.97   1st Qu.:10.94   1st Qu.:10.86   Class :character  
##  Median :11.67   Median :11.62   Median :11.53   Mode  :character  
##  Mean   :11.52   Mean   :11.47   Mean   :11.41                     
##  3rd Qu.:12.00   3rd Qu.:11.96   3rd Qu.:11.93                     
##  Max.   :12.65   Max.   :12.60   Max.   :12.52                     
##      type          
##  Length:52         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:53         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 0.398150 1.528018 2.198513 3.162314 4.889189
## 
## $n
## [1] 52
## 
## $conf
## [1] 1.840428 2.556598
## 
## $out
## [1]  8.041755 11.960444
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AY-1"    "AY-2"    "AY-3"    "AY-4"    "AY-5"    "AY-6"    "AY-7"   
##  [8] "AY-8"    "AY-9"    "AY-10"   "AY-11"   "AY-12"   "AY-13"   "AY-14"  
## [15] "AY-15"   "AY-16"   "AY-17"   "AY-18"   "AY-19"   "AY-20"   "AY-21"  
## [22] "AY-22"   "AY-23"   "AY-24"   "AY-25"   "AY-26"   "AY-27"   "AY-28"  
## [29] "AY-29"   "AY-30"   "AY-31"   "AY-32"   "AY-33"   "AY-34"   "AY-35"  
## [36] "AY-36"   "AY-37"   "AY-38"   "AY-39"   "AY-40"   "AY-41"   "AY-42"  
## [43] "AY-50"   "AY-51"   "AY-52"   "AY-53"   "AY-54"   "AY-50_2" "AY-51_2"
## [50] "AY-52_2" "AY-53_2" "AY-54_2"
loi <- subset(loi[1:47, ])

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 0.7233976 1.5855926 2.3958197 3.4621816 5.4590326
## 
## $n
## [1] 50
## 
## $conf
## [1] 1.976504 2.815135
## 
## $out
## [1] 7.994718
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :37.56   Min.   : 7.97   Min.   :16.81  
##  1st Qu.:42.13   1st Qu.: 9.72   1st Qu.:19.76  
##  Median :43.64   Median :10.38   Median :21.18  
##  Mean   :43.92   Mean   :10.49   Mean   :21.22  
##  3rd Qu.:45.91   3rd Qu.:11.29   3rd Qu.:22.34  
##  Max.   :48.24   Max.   :13.53   Max.   :25.03
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Ashdod-Yam: Byzantine

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2", 
            "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##         Area        Type       Al2O3                 SiO2        
##  Byzantine:7   ceiling:3   Min.   :-0.6649899   Min.   :-0.7149  
##                floor  :1   1st Qu.:-0.5260944   1st Qu.:-0.5619  
##                PEM    :3   Median :-0.3729803   Median :-0.3191  
##                            Mean   : 0.0000000   Mean   : 0.0000  
##                            3rd Qu.:-0.0009942   3rd Qu.: 0.1869  
##                            Max.   : 2.0921476   Max.   : 1.7841  
##       CaO                Ti                   Mn                Fe         
##  Min.   :-1.6463   Min.   :-0.6002745   Min.   :-0.5364   Min.   :-0.9003  
##  1st Qu.:-0.5655   1st Qu.:-0.4813714   1st Qu.:-0.4011   1st Qu.:-0.6185  
##  Median : 0.3098   Median :-0.3873934   Median :-0.3370   Median :-0.4877  
##  Mean   : 0.0000   Mean   : 0.0000000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.6541   3rd Qu.: 0.0005995   3rd Qu.:-0.1593   3rd Qu.: 0.5536  
##  Max.   : 1.1593   Max.   : 1.9492115   Max.   : 1.9942   Max.   : 1.5177  
##        Ni                Cu                Zn                Sr         
##  Min.   :-0.9780   Min.   :-1.3184   Min.   :-0.7633   Min.   :-0.7906  
##  1st Qu.:-0.4890   1st Qu.:-0.2432   1st Qu.:-0.5817   1st Qu.:-0.3883  
##  Median :-0.2574   Median :-0.1984   Median :-0.1408   Median :-0.2674  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.3089   3rd Qu.: 0.1600   3rd Qu.: 0.3261   3rd Qu.: 0.4585  
##  Max.   : 1.5957   Max.   : 1.6832   Max.   : 1.4154   Max.   : 0.9176  
##        Zr          
##  Min.   :-0.69215  
##  1st Qu.:-0.54451  
##  Median :-0.24621  
##  Mean   : 0.00000  
##  3rd Qu.: 0.02497  
##  Max.   : 1.97745

pxrf_all:

summary(pxrf_all)
##         Area        Type        MgO              Al2O3           
##  Byzantine:7   ceiling:3   Min.   :-0.9836   Min.   :-0.6649899  
##                floor  :1   1st Qu.:-0.4419   1st Qu.:-0.5260944  
##                PEM    :3   Median :-0.2397   Median :-0.3729803  
##                            Mean   :-0.1222   Mean   : 0.0000000  
##                            3rd Qu.: 0.2820   3rd Qu.:-0.0009942  
##                            Max.   : 0.7831   Max.   : 2.0921476  
##                            NA's   :1                             
##       SiO2              K2O                CaO                Ti            
##  Min.   :-0.7149   Min.   :-1.03235   Min.   :-1.6463   Min.   :-0.6002745  
##  1st Qu.:-0.5619   1st Qu.:-0.54671   1st Qu.:-0.5655   1st Qu.:-0.4813714  
##  Median :-0.3191   Median :-0.25114   Median : 0.3098   Median :-0.3873934  
##  Mean   : 0.0000   Mean   : 0.06213   Mean   : 0.0000   Mean   : 0.0000000  
##  3rd Qu.: 0.1869   3rd Qu.: 0.29801   3rd Qu.: 0.6541   3rd Qu.: 0.0005995  
##  Max.   : 1.7841   Max.   : 1.84282   Max.   : 1.1593   Max.   : 1.9492115  
##                    NA's   :2                                                
##        Mn                Fe                Ni                Cu         
##  Min.   :-0.5364   Min.   :-0.9003   Min.   :-0.9780   Min.   :-1.3184  
##  1st Qu.:-0.4011   1st Qu.:-0.6185   1st Qu.:-0.4890   1st Qu.:-0.2432  
##  Median :-0.3370   Median :-0.4877   Median :-0.2574   Median :-0.1984  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.:-0.1593   3rd Qu.: 0.5536   3rd Qu.: 0.3089   3rd Qu.: 0.1600  
##  Max.   : 1.9942   Max.   : 1.5177   Max.   : 1.5957   Max.   : 1.6832  
##                                                                         
##        Zn                As                 Rb                 Sr         
##  Min.   :-0.7633   Min.   :-0.88791   Min.   :-0.83065   Min.   :-0.7906  
##  1st Qu.:-0.5817   1st Qu.:-0.56643   1st Qu.:-0.55508   1st Qu.:-0.3883  
##  Median :-0.1408   Median :-0.03062   Median :-0.12204   Median :-0.2674  
##  Mean   : 0.0000   Mean   : 0.09185   Mean   :-0.08267   Mean   : 0.0000  
##  3rd Qu.: 0.3261   3rd Qu.: 0.82668   3rd Qu.: 0.25195   3rd Qu.: 0.4585  
##  Max.   : 1.4154   Max.   : 1.13285   Max.   : 0.90151   Max.   : 0.9176  
##                    NA's   :1          NA's   :1                           
##        Y                  Zr                 Nb                Ag   
##  Min.   :-0.82699   Min.   :-0.69215   Min.   :-0.2048   Min.   :0  
##  1st Qu.:-0.64944   1st Qu.:-0.54451   1st Qu.:-0.2048   1st Qu.:0  
##  Median :-0.29435   Median :-0.24621   Median :-0.2048   Median :0  
##  Mean   : 0.06074   Mean   : 0.00000   Mean   :-0.2048   Mean   :0  
##  3rd Qu.: 0.59337   3rd Qu.: 0.02497   3rd Qu.:-0.2048   3rd Qu.:0  
##  Max.   : 1.48110   Max.   : 1.97745   Max.   :-0.2048   Max.   :0  
##  NA's   :2                             NA's   :6         NA's   :6  
##        Pb          
##  Min.   :-0.00135  
##  1st Qu.: 0.29028  
##  Median : 0.39289  
##  Mean   : 0.40100  
##  3rd Qu.: 0.50361  
##  Max.   : 0.81954  
##  NA's   :3

pXRF: K means cluster analysis

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Optimal number of clusters in K-means analysis: 2
k_max <- 6
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:11], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA
pca_1 <- prcomp(pxrf_1[3:11])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6       PC7
## Standard deviation     2.2445 0.8845 0.75353 0.40411 0.30308 0.06678 1.703e-16
## Proportion of Variance 0.7578 0.1177 0.08542 0.02457 0.01382 0.00067 0.000e+00
## Cumulative Proportion  0.7578 0.8755 0.96094 0.98551 0.99933 1.00000 1.000e+00
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA grouped by area")

PCA with all the feasible elements:

# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:21])
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5    PC6       PC7
## Standard deviation     2.6967 1.4173 1.03547 0.69371 0.65343 0.3604 2.873e-16
## Proportion of Variance 0.6384 0.1763 0.09413 0.04225 0.03748 0.0114 0.000e+00
## Cumulative Proportion  0.6384 0.8147 0.90887 0.95111 0.98860 1.0000 1.000e+00
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")

autoplot(pca_2, data=pxrf_all, colour='Type', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by area")

pXRF: HCA

# HCA

# HCA dendrogam 1
dend <- 
    pxrf_1[3:11] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=4) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=4) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward4 <- hclust(dist(pxrf_1[3:11], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=4)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:7           Min.   :8.382   Min.   :10.92   Min.   :2.257  
##  Class :character   1st Qu.:8.555   1st Qu.:11.39   1st Qu.:2.613  
##  Mode  :character   Median :8.666   Median :11.50   Median :2.880  
##                     Mean   :8.943   Mean   :11.68   Mean   :2.739  
##                     3rd Qu.:9.442   3rd Qu.:12.13   3rd Qu.:2.905  
##                     Max.   :9.560   Max.   :12.31   Max.   :3.002  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.83   Min.   :10.62   Min.   :10.50   Length:7          
##  1st Qu.:11.33   1st Qu.:11.25   1st Qu.:11.04   Class :character  
##  Median :11.45   Median :11.39   Median :11.21   Mode  :character  
##  Mean   :11.63   Mean   :11.55   Mean   :11.24                     
##  3rd Qu.:12.11   3rd Qu.:12.05   3rd Qu.:11.43                     
##  Max.   :12.26   Max.   :12.24   Max.   :12.06                     
##      type          
##  Length:7          
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date      Batch               Wet           
##  Length:7           Min.   :44536   Length:7           Length:7          
##  Class :character   1st Qu.:44536   Class :character   Class :character  
##  Mode  :character   Median :44536   Mode  :character   Mode  :character  
##                     Mean   :44536                                        
##                     3rd Qu.:44536                                        
##                     Max.   :44536                                        
##      Dry              Mass_550           Mass_950           context         
##  Length:7           Length:7           Length:7           Length:7          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      type          
##  Length:7          
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# Color by type      
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(type))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :34.56   Min.   : 2.480   Min.   : 6.98  
##  1st Qu.:38.93   1st Qu.: 5.215   1st Qu.:12.73  
##  Median :45.30   Median : 5.540   Median :13.28  
##  Mean   :46.69   Mean   : 6.211   Mean   :14.56  
##  3rd Qu.:50.95   3rd Qu.: 6.675   3rd Qu.:16.21  
##  Max.   :67.20   Max.   :11.680   Max.   :23.75
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Israel

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2", 
            "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##              Area              Type         MgO              Al2O3        
##  Ad-Halom      :9   floor        : 2   Min.   :-1.2243   Min.   :-1.1535  
##  Rishon Le'Zion:1   floor plaster: 1   1st Qu.:-0.3655   1st Qu.:-0.7296  
##  Tell Iztabba  :9   lime plaster : 4   Median : 0.3253   Median :-0.1502  
##                     mudbrick     :12   Mean   : 0.2570   Mean   :-0.1498  
##                                        3rd Qu.: 0.7669   3rd Qu.: 0.3186  
##                                        Max.   : 2.2752   Max.   : 1.0340  
##       CaO                Ti                 Mn                Fe          
##  Min.   :-1.1324   Min.   :-1.62849   Min.   :-1.1564   Min.   :-1.21031  
##  1st Qu.:-0.3895   1st Qu.:-0.59656   1st Qu.:-0.7510   1st Qu.:-0.75649  
##  Median : 0.2485   Median :-0.02908   Median :-0.1021   Median : 0.09126  
##  Mean   : 0.1365   Mean   :-0.17581   Mean   :-0.1364   Mean   :-0.06331  
##  3rd Qu.: 0.6235   3rd Qu.: 0.38876   3rd Qu.: 0.2178   3rd Qu.: 0.47399  
##  Max.   : 2.0327   Max.   : 1.15358   Max.   : 1.4046   Max.   : 1.24389  
##        Cu                 Zn                 Sr                Zr         
##  Min.   :-1.23132   Min.   :-1.47722   Min.   :-1.1311   Min.   :-1.9036  
##  1st Qu.:-0.40202   1st Qu.:-0.56202   1st Qu.:-0.5905   1st Qu.:-1.2329  
##  Median : 0.06850   Median :-0.11447   Median :-0.2214   Median :-0.1911  
##  Mean   : 0.03885   Mean   : 0.07566   Mean   :-0.1056   Mean   :-0.3629  
##  3rd Qu.: 0.38611   3rd Qu.: 0.91136   3rd Qu.: 0.3090   3rd Qu.: 0.4183  
##  Max.   : 1.19776   Max.   : 1.39913   Max.   : 1.7253   Max.   : 1.4929

pxrf_all:

summary(pxrf_all)
##              Area              Type         MgO              Al2O3        
##  Ad-Halom      :9   floor        : 2   Min.   :-1.2243   Min.   :-1.1535  
##  Rishon Le'Zion:1   floor plaster: 1   1st Qu.:-0.3655   1st Qu.:-0.7296  
##  Tell Iztabba  :9   lime plaster : 4   Median : 0.3253   Median :-0.1502  
##                     mudbrick     :12   Mean   : 0.2570   Mean   :-0.1498  
##                                        3rd Qu.: 0.7669   3rd Qu.: 0.3186  
##                                        Max.   : 2.2752   Max.   : 1.0340  
##                                                                           
##       SiO2               K2O                 CaO                Ti          
##  Min.   :-1.63885   Min.   :-0.379762   Min.   :-1.1324   Min.   :-1.62849  
##  1st Qu.:-0.60735   1st Qu.: 0.007179   1st Qu.:-0.3895   1st Qu.:-0.59656  
##  Median :-0.05503   Median : 0.329753   Median : 0.2485   Median :-0.02908  
##  Mean   :-0.11085   Mean   : 0.326436   Mean   : 0.1365   Mean   :-0.17581  
##  3rd Qu.: 0.33797   3rd Qu.: 0.477998   3rd Qu.: 0.6235   3rd Qu.: 0.38876  
##  Max.   : 1.39686   Max.   : 1.190872   Max.   : 2.0327   Max.   : 1.15358  
##  NA's   :1          NA's   :8                                               
##        Mn                Fe                 Ni                 Cu          
##  Min.   :-1.1564   Min.   :-1.21031   Min.   :-0.85215   Min.   :-1.23132  
##  1st Qu.:-0.7510   1st Qu.:-0.75649   1st Qu.:-0.40208   1st Qu.:-0.40202  
##  Median :-0.1021   Median : 0.09126   Median :-0.18228   Median : 0.06850  
##  Mean   :-0.1364   Mean   :-0.06331   Mean   : 0.01659   Mean   : 0.03885  
##  3rd Qu.: 0.2178   3rd Qu.: 0.47399   3rd Qu.: 0.26255   3rd Qu.: 0.38611  
##  Max.   : 1.4046   Max.   : 1.24389   Max.   : 1.17838   Max.   : 1.19776  
##                                       NA's   :11                           
##        Zn                 Rb                Sr                Y           
##  Min.   :-1.47722   Min.   :-0.9609   Min.   :-1.1311   Min.   :-0.85071  
##  1st Qu.:-0.56202   1st Qu.:-0.3269   1st Qu.:-0.5905   1st Qu.:-0.47216  
##  Median :-0.11447   Median : 0.3071   Median :-0.2214   Median :-0.09361  
##  Mean   : 0.07566   Mean   : 0.2332   Mean   :-0.1056   Mean   :-0.30009  
##  3rd Qu.: 0.91136   3rd Qu.: 0.6637   3rd Qu.: 0.3090   3rd Qu.:-0.02478  
##  Max.   : 1.39913   Max.   : 1.4364   Max.   : 1.7253   Max.   : 0.04405  
##                     NA's   :6                           NA's   :16        
##        Zr                Nb                Ag         
##  Min.   :-1.9036   Min.   :-0.1096   Min.   :0.07923  
##  1st Qu.:-1.2329   1st Qu.: 0.2421   1st Qu.:0.07923  
##  Median :-0.1911   Median : 0.5937   Median :0.07923  
##  Mean   :-0.3629   Mean   : 0.5937   Mean   :0.07923  
##  3rd Qu.: 0.4183   3rd Qu.: 0.9454   3rd Qu.:0.07923  
##  Max.   : 1.4929   Max.   : 1.2971   Max.   :0.07923  
##                    NA's   :17        NA's   :18

pXRF: K means cluster analysis

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:10], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:10], centers = 4)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA
pca_1 <- prcomp(pxrf_1[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5    PC6     PC7
## Standard deviation     1.7341 1.1365 0.8140 0.51966 0.42169 0.2369 0.15638
## Proportion of Variance 0.5463 0.2346 0.1204 0.04906 0.03231 0.0102 0.00444
## Cumulative Proportion  0.5463 0.7809 0.9013 0.95038 0.98268 0.9929 0.99732
##                            PC8
## Standard deviation     0.12142
## Proportion of Variance 0.00268
## Cumulative Proportion  1.00000
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA grouped by area")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA grouped by type")

PCA with all the feasible elements:

# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:19])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0585 1.2500 0.8709 0.73570 0.63683 0.41024 0.36958
## Proportion of Variance 0.5252 0.1937 0.0940 0.06709 0.05027 0.02086 0.01693
## Cumulative Proportion  0.5252 0.7189 0.8129 0.88000 0.93027 0.95113 0.96806
##                            PC8     PC9   PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.26815 0.23622 0.2126 0.18137 0.17097 0.09826 0.07320
## Proportion of Variance 0.00891 0.00692 0.0056 0.00408 0.00362 0.00120 0.00066
## Cumulative Proportion  0.97698 0.98389 0.9895 0.99357 0.99720 0.99839 0.99906
##                           PC15    PC16      PC17
## Standard deviation     0.07015 0.05176 0.0002973
## Proportion of Variance 0.00061 0.00033 0.0000000
## Cumulative Proportion  0.99967 1.00000 1.0000000
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")

autoplot(pca_2, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by area")

autoplot(pca_2, data=pxrf_all, colour='Type', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by type")

pXRF: HCA

# HCA dendrogam 1
dend <- 
    pxrf_1[3:10] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=4) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=6) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward4 <- hclust(dist(pxrf_1[3:10], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=6)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_israel.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2)

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:19          Min.   :7.921   Min.   :10.55   Min.   :2.081  
##  Class :character   1st Qu.:8.557   1st Qu.:11.23   1st Qu.:2.403  
##  Mode  :character   Median :9.278   Median :11.78   Median :2.584  
##                     Mean   :9.017   Mean   :11.63   Mean   :2.611  
##                     3rd Qu.:9.452   3rd Qu.:11.92   3rd Qu.:2.828  
##                     Max.   :9.787   Max.   :12.62   Max.   :3.231  
##    dry_weight     after_550_C     after_950_C       context         
##  Min.   :10.52   Min.   :10.42   Min.   : 9.773   Length:19         
##  1st Qu.:11.23   1st Qu.:11.18   1st Qu.:10.376   Class :character  
##  Median :11.76   Median :11.70   Median :10.971   Mode  :character  
##  Mean   :11.60   Mean   :11.53   Mean   :10.884                     
##  3rd Qu.:11.88   3rd Qu.:11.78   3rd Qu.:11.322                     
##  Max.   :12.56   Max.   :12.49   Max.   :12.371                     
##      type          
##  Length:19         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:12         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# Color by context
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :43.75   Min.   : 3.160   Min.   :13.85  
##  1st Qu.:56.08   1st Qu.: 3.770   1st Qu.:14.71  
##  Median :64.30   Median : 4.350   Median :15.85  
##  Mean   :63.19   Mean   : 6.649   Mean   :17.27  
##  3rd Qu.:71.51   3rd Qu.: 6.280   3rd Qu.:17.70  
##  Max.   :75.34   Max.   :19.530   Max.   :25.84
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Palaepaphos

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(GGally); # GGpairs
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Mn 
##     0     0    34    34    34    35    35    34    36    34     0    41     0 
##    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Rh    Ag 
##     0     0     0    34    48    35    34    36     0    51    49

pxrf_final: Final analysis data set with only the selected elements, all samples (soil and published included)

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Rh) %>% 
        select(-Ag)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pxrf_MB: Final analysis data set for NEW MUDBRICK samples only (= no soil or published samples)

pxrf_MB <- pxrf_final[c(1:11), ]
rownames(pxrf_MB)
##  [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9"  "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18"

pxrf_soil: Final analysis data set for NEW samples only (= no published samples, yes soil)

pxrf_soil <- pxrf_final[-c(12:45), ]
rownames(pxrf_soil)
##  [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9"  "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18" "PP-1"  "PP-2"  "PP-3"  "PP-6"  "PP-4"  "PP-7"  "PP-5" 
## [19] "PP-8"

pxrf_MB2: Final analysis data set for ALL MUDBRICK samples (= no soil)

pxrf_MB2 <- pxrf_final[c(1:45), ]
rownames(pxrf_MB2)
##  [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9"  "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18" "1"     "10"    "11"    "12"    "13"    "14"    "15"   
## [19] "16"    "17"    "18"    "19"    "2"     "20"    "21"    "22"    "23"   
## [28] "24"    "25"    "26"    "27"    "28"    "29"    "3"     "30"    "31"   
## [37] "32"    "33"    "34"    "4"     "5"     "6"     "7"     "8"     "9"

pXRF: K means cluster analysis

Only the “new” mudbrick samples

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_MB[3:12], centers = 3)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_MB[3:12], mapping = aes(col = cluster_x))

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_MB[3:12], 3), data = pxrf_MB, label = TRUE, label.size = 3)

pXRF: PCA

PCA with only new mudbrick samples:

# Elements
colnames(pxrf_MB[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_MB[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.5967 1.0886 0.99328 0.85704 0.40479 0.33892 0.20666
## Proportion of Variance 0.6735 0.1184 0.09854 0.07336 0.01637 0.01147 0.00427
## Cumulative Proportion  0.6735 0.7918 0.89037 0.96373 0.98010 0.99157 0.99584
##                            PC8     PC9    PC10
## Standard deviation     0.17917 0.09521 0.02255
## Proportion of Variance 0.00321 0.00091 0.00005
## Cumulative Proportion  0.99904 0.99995 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

PCA(pxrf_MB[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with soil samples:

# Elements
colnames(pxrf_soil[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_2 <- prcomp(pxrf_soil[3:12])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0533 1.6050 1.0187 0.82293 0.69376 0.39645 0.33871
## Proportion of Variance 0.4498 0.2748 0.1107 0.07225 0.05135 0.01677 0.01224
## Cumulative Proportion  0.4498 0.7247 0.8354 0.90763 0.95898 0.97575 0.98799
##                            PC8     PC9   PC10
## Standard deviation     0.24806 0.21529 0.0685
## Proportion of Variance 0.00657 0.00495 0.0005
## Cumulative Proportion  0.99455 0.99950 1.0000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_2, data=pxrf_soil, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_2, data=pxrf_soil, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

PCA(pxrf_soil[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with all the mudbrick samples

Including the previously published ones, no soil

# Elements
colnames(pxrf_MB2[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_3 <- prcomp(pxrf_MB2[3:12])
summary(pca_3)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     1.8473 1.1712 0.76106 0.60306 0.44231 0.40557 0.30364
## Proportion of Variance 0.5429 0.2182 0.09215 0.05786 0.03112 0.02617 0.01467
## Cumulative Proportion  0.5429 0.7611 0.85326 0.91112 0.94224 0.96841 0.98307
##                            PC8     PC9    PC10
## Standard deviation     0.22882 0.19928 0.11965
## Proportion of Variance 0.00833 0.00632 0.00228
## Cumulative Proportion  0.99140 0.99772 1.00000
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_3, data=pxrf_MB2, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

PCA(pxrf_MB2[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 45 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

HCA including only new samples:

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_soil %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

type <- as.factor(pxrf_soil[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with new samples by type")
legend("topright", legend = levels(type), fill = cols_t)

HCA including all mudbrick samples :

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB2 %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

area <- as.factor(pxrf_MB2[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]

# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(MB_grain)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet;weight       sample;wet   
##  Length:27          Min.   :8.100   Min.   : 9.637   Min.   :1.537  
##  Class :character   1st Qu.:8.639   1st Qu.:10.859   1st Qu.:2.193  
##  Mode  :character   Median :8.997   Median :11.184   Median :2.371  
##                     Mean   :8.988   Mean   :11.274   Mean   :2.286  
##                     3rd Qu.:9.454   3rd Qu.:11.844   3rd Qu.:2.453  
##                     Max.   :9.560   Max.   :12.301   Max.   :2.785  
##    dry_weight      after_550_C      after_950_C      context         
##  Min.   : 9.595   Min.   : 9.484   Min.   : 9.23   Length:27         
##  1st Qu.:10.813   1st Qu.:10.721   1st Qu.:10.27   Class :character  
##  Median :11.132   Median :11.019   Median :10.59   Mode  :character  
##  Mean   :11.216   Mean   :11.113   Mean   :10.67                     
##  3rd Qu.:11.777   3rd Qu.:11.681   3rd Qu.:11.18                     
##  Max.   :12.218   Max.   :12.076   Max.   :11.56                     
##      type          
##  Length:27         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:30          Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:30          Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:30         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# Removing duplicates from the next graph and creating MB only subset
loi <- subset(loi[1:19, ])
loi_MB <- subset(loi[9:19, ])
rownames(loi)
##  [1] "PP-1"  "PP-2"  "PP-3"  "PP-4"  "PP-5"  "PP-6"  "PP-7"  "PP-8"  "PP-9" 
## [10] "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17" "PP-18"
## [19] "PP-19"
rownames(loi_MB)
##  [1] "PP-9"  "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17"
## [10] "PP-18" "PP-19"
# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi_MB)

boxplot(c950~context, data=loi_MB)

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# MB only
ggplot(loi_MB, 
      aes(c550, c950, label = rownames(loi_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "MB only organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi_MB, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# Removing duplicates and creating MB only subset
tga_MB <- subset(tga[9:20, ])
tga_MB <- subset(tga_MB[-4, ])
rownames(tga_MB)
##  [1] "PP-9"  "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17"
## [10] "PP-18" "PP-19"
# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga_MB)

boxplot(c950~context, data=tga_MB)

# Color by context
ggplot(tga_MB, 
      aes(c550, c950, label = rownames(tga_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga_MB, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :62.38   Min.   : 7.27   Min.   :19.90  
##  1st Qu.:63.47   1st Qu.: 8.50   1st Qu.:21.36  
##  Median :64.19   Median : 9.34   Median :21.72  
##  Mean   :65.50   Mean   : 9.29   Mean   :21.92  
##  3rd Qu.:65.78   3rd Qu.:10.56   3rd Qu.:22.96  
##  Max.   :72.00   Max.   :10.90   Max.   :23.90
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Artaxata

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2", 
            "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##               Area         Type         MgO              Al2O3         
##  Hill II        :3   mudbrick:10   Min.   :-0.7957   Min.   :-1.47906  
##  Phase I        :2                 1st Qu.:-0.4773   1st Qu.:-0.41015  
##  Phase II       :2                 Median :-0.2741   Median : 0.05631  
##  Phase II or III:2                 Mean   : 0.0000   Mean   : 0.00000  
##  Urartian silo  :1                 3rd Qu.: 0.3559   3rd Qu.: 0.45276  
##                                    Max.   : 1.9131   Max.   : 1.38944  
##       SiO2              K2O                CaO                   Ti          
##  Min.   :-1.6654   Min.   :-1.55669   Min.   :-1.4333259   Min.   :-1.87044  
##  1st Qu.:-0.3827   1st Qu.:-0.68787   1st Qu.:-0.6680205   1st Qu.:-0.31750  
##  Median : 0.3100   Median : 0.03848   Median :-0.0001026   Median : 0.02437  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000000   Mean   : 0.00000  
##  3rd Qu.: 0.6156   3rd Qu.: 0.75515   3rd Qu.: 0.7641718   3rd Qu.: 0.81009  
##  Max.   : 0.8556   Max.   : 1.11418   Max.   : 1.2091486   Max.   : 0.86883  
##        Mn                 Fe                 Ni                 Cu         
##  Min.   :-1.03797   Min.   :-0.73814   Min.   :-0.94570   Min.   :-1.2582  
##  1st Qu.:-0.30897   1st Qu.:-0.28874   1st Qu.:-0.66424   1st Qu.:-0.5607  
##  Median :-0.09355   Median :-0.08696   Median :-0.07318   Median : 0.1094  
##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.21238   3rd Qu.: 0.08823   3rd Qu.: 0.45456   3rd Qu.: 0.3966  
##  Max.   : 1.57654   Max.   : 1.60860   Max.   : 1.50299   Max.   : 1.5865  
##        Zn                Rb                Sr                Y          
##  Min.   :-0.8847   Min.   :-1.2036   Min.   :-1.4059   Min.   :-1.0149  
##  1st Qu.:-0.3508   1st Qu.:-0.4637   1st Qu.:-0.3803   1st Qu.:-0.3542  
##  Median :-0.0839   Median :-0.2006   Median :-0.1555   Median :-0.1163  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.2720   3rd Qu.: 0.3256   3rd Qu.: 0.4915   3rd Qu.: 0.1216  
##  Max.   : 1.5813   Max.   : 1.4601   Max.   : 1.4314   Max.   : 1.7338  
##        Zr         
##  Min.   :-1.1158  
##  1st Qu.:-0.5166  
##  Median :-0.3001  
##  Mean   : 0.0000  
##  3rd Qu.: 0.1279  
##  Max.   : 1.9054

pxrf_all:

summary(pxrf_all)
##               Area         Type         MgO              Al2O3         
##  Hill II        :3   mudbrick:10   Min.   :-0.7957   Min.   :-1.47906  
##  Phase I        :2                 1st Qu.:-0.4773   1st Qu.:-0.41015  
##  Phase II       :2                 Median :-0.2741   Median : 0.05631  
##  Phase II or III:2                 Mean   : 0.0000   Mean   : 0.00000  
##  Urartian silo  :1                 3rd Qu.: 0.3559   3rd Qu.: 0.45276  
##                                    Max.   : 1.9131   Max.   : 1.38944  
##                                                                        
##       SiO2              K2O                CaO                   Ti          
##  Min.   :-1.6654   Min.   :-1.55669   Min.   :-1.4333259   Min.   :-1.87044  
##  1st Qu.:-0.3827   1st Qu.:-0.68787   1st Qu.:-0.6680205   1st Qu.:-0.31750  
##  Median : 0.3100   Median : 0.03848   Median :-0.0001026   Median : 0.02437  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000000   Mean   : 0.00000  
##  3rd Qu.: 0.6156   3rd Qu.: 0.75515   3rd Qu.: 0.7641718   3rd Qu.: 0.81009  
##  Max.   : 0.8556   Max.   : 1.11418   Max.   : 1.2091486   Max.   : 0.86883  
##                                                                              
##        V                  Cr                  Mn                 Fe          
##  Min.   :-0.46003   Min.   :-0.686179   Min.   :-1.03797   Min.   :-0.73814  
##  1st Qu.:-0.43566   1st Qu.:-0.459188   1st Qu.:-0.30897   1st Qu.:-0.28874  
##  Median :-0.19193   Median :-0.232198   Median :-0.09355   Median :-0.08696  
##  Mean   :-0.09932   Mean   :-0.232198   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.02742   3rd Qu.:-0.005208   3rd Qu.: 0.21238   3rd Qu.: 0.08823  
##  Max.   : 0.56361   Max.   : 0.221783   Max.   : 1.57654   Max.   : 1.60860  
##  NA's   :5          NA's   :8                                                
##        Ni                 Cu                Zn                As          
##  Min.   :-0.94570   Min.   :-1.2582   Min.   :-0.8847   Min.   :-1.03932  
##  1st Qu.:-0.66424   1st Qu.:-0.5607   1st Qu.:-0.3508   1st Qu.:-0.21581  
##  Median :-0.07318   Median : 0.1094   Median :-0.0839   Median :-0.05111  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.05869  
##  3rd Qu.: 0.45456   3rd Qu.: 0.3966   3rd Qu.: 0.2720   3rd Qu.: 0.44299  
##  Max.   : 1.50299   Max.   : 1.5865   Max.   : 1.5813   Max.   : 1.10179  
##                                                         NA's   :1         
##        Rb                Sr                Y                 Zr         
##  Min.   :-1.2036   Min.   :-1.4059   Min.   :-1.0149   Min.   :-1.1158  
##  1st Qu.:-0.4637   1st Qu.:-0.3803   1st Qu.:-0.3542   1st Qu.:-0.5166  
##  Median :-0.2006   Median :-0.1555   Median :-0.1163   Median :-0.3001  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.3256   3rd Qu.: 0.4915   3rd Qu.: 0.1216   3rd Qu.: 0.1279  
##  Max.   : 1.4601   Max.   : 1.4314   Max.   : 1.7338   Max.   : 1.9054  
##                                                                         
##        Nb          
##  Min.   :-0.65796  
##  1st Qu.:-0.50862  
##  Median :-0.06060  
##  Mean   :-0.09379  
##  3rd Qu.:-0.06060  
##  Max.   : 0.93500  
##  NA's   :4

pXRF: K means cluster analysis

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:12], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA
pca_1 <- prcomp(pxrf_1[3:15])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.4147 1.0889 0.9306 0.48309 0.37987 0.31662 0.24466
## Proportion of Variance 0.6901 0.1403 0.1025 0.02762 0.01708 0.01186 0.00708
## Cumulative Proportion  0.6901 0.8305 0.9330 0.96059 0.97767 0.98953 0.99662
##                            PC8     PC9      PC10
## Standard deviation     0.15630 0.06455 7.266e-17
## Proportion of Variance 0.00289 0.00049 0.000e+00
## Cumulative Proportion  0.99951 1.00000 1.000e+00
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA grouped by area")

PCA with all the feasible elements:

# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:21])
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.6421 1.2026 0.98838 0.84185 0.45012 0.39002 0.30457
## Proportion of Variance 0.6529 0.1353 0.09136 0.06628 0.01895 0.01423 0.00868
## Cumulative Proportion  0.6529 0.7881 0.87946 0.94574 0.96469 0.97891 0.98759
##                            PC8     PC9      PC10
## Standard deviation     0.28180 0.23084 2.238e-16
## Proportion of Variance 0.00743 0.00498 0.000e+00
## Cumulative Proportion  0.99502 1.00000 1.000e+00
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")

autoplot(pca_2, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by area")

pXRF: HCA

# HCA

# HCA dendrogam 1
dend <- 
    pxrf_1[3:12] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=4) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=4) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward4 <- hclust(dist(pxrf_1[3:12], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=4)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:10          Min.   :8.066   Min.   :10.70   Min.   :1.923  
##  Class :character   1st Qu.:8.461   1st Qu.:10.93   1st Qu.:2.326  
##  Mode  :character   Median :8.995   Median :11.48   Median :2.558  
##                     Mean   :8.892   Mean   :11.39   Mean   :2.496  
##                     3rd Qu.:9.174   3rd Qu.:11.75   3rd Qu.:2.701  
##                     Max.   :9.890   Max.   :12.18   Max.   :2.842  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.58   Min.   :10.47   Min.   :10.31   Length:10         
##  1st Qu.:10.85   1st Qu.:10.78   1st Qu.:10.60   Class :character  
##  Median :11.43   Median :11.33   Median :11.16   Mode  :character  
##  Mean   :11.32   Mean   :11.22   Mean   :11.07                     
##  3rd Qu.:11.69   3rd Qu.:11.58   3rd Qu.:11.42                     
##  Max.   :12.13   Max.   :12.06   Max.   :11.94                     
##      type          
##  Length:10         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:12         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# Color by context
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :51.37   Min.   :3.880   Min.   :15.92  
##  1st Qu.:57.73   1st Qu.:4.402   1st Qu.:16.49  
##  Median :58.79   Median :4.555   Median :16.93  
##  Mean   :58.18   Mean   :5.067   Mean   :17.32  
##  3rd Qu.:59.58   3rd Qu.:4.695   3rd Qu.:17.32  
##  Max.   :61.57   Max.   :8.670   Max.   :21.47
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)